library(svrep)

### Weights for waves 4 nad 5 pre-match

##excluyendo NAs': que no responde confianza, y que no vota, y que no tiene interés en política

mm <- mm %>% filter(!is.na(conf_soc_w05),!is.na(conf_soc_w04),!is.na(conf_soc_w03),
                    !is.na(pol_interesF_w04),!is.na(delta_elec_winner))

mm <- mm %>% mutate(educ_r = factor(case_when(
  educ_w01 >= 1 & educ_w01 <= 5 | educ_w01 == 99 ~ 1,
  educ_w01 == 6  ~ 2,
  educ_w01 == 7  ~ 3,
  educ_w01 == 8  ~ 4,
  educ_w01 == 9 | educ_w01 == 10 ~ 5
), labels = c("Escolar","Educacion Tecnica Superior Incompleta","Educacion Tecnica Superior Completa",
              "Profesional Incompleto","Profesional y Postgrado")),
edad_r = factor(case_when(
  edad_w01 %in% c(18:29) ~ 1,
  edad_w01 %in% c(30:39) ~ 2,
  edad_w01 %in% c(40:49) ~ 3,
  edad_w01 %in% c(50:100) ~ 4
),
labels = c("18 a 29","30 a 39","40 a 49", "50 a 65")
))

### Cambiar educación

mm$educaFr_w03 <- factor(car::recode(mm$educ_w03, "1:5=1; 6:7=2; 8:10=3; 99=1"),
                          labels=c("Secondary Ed.", "Tertiary Tech.", "Tertiary Univ"))

mm$sec <- ifelse(mm$educaFr_w03 == "Secondary Ed.",1,0)
mm$ter_tec <- ifelse(mm$educaFr_w03 == "Tertiary Tech.",1,0)
mm$ter_univ <- ifelse(mm$educaFr_w03 == "Tertiary Univ",1,0)

edad.dist <- data.frame(edad_r = c("18 a 29","30 a 39","40 a 49", "50 a 65"),
                        Freq = nrow(mm) * c(0.29, 0.21, 0.19, 0.31)) # En base a CASEN 2020
educ.dist <- data.frame(educ_r = c("Escolar","Educacion Tecnica Superior Incompleta","Educacion Tecnica Superior Completa",
                                   "Profesional Incompleto","Profesional y Postgrado"),
                        Freq = nrow(mm) * c(0.58,0.04,0.09,0.11,0.18)) # En base a CASEN 2020

k <- NULL
l <- list()

for (k in 2:20){
  # The weight is created
  
  svy.mm <- svydesign(ids = ~1, data = mm)
  svy.mm.rake <- rake(design = svy.mm, sample.margins = list(~educ_r,~edad_r),
                          population.margins = list(educ.dist,edad.dist))
  
  # A df with the values of education is created
  
  df <- as.data.frame(prop.table(table(svy.mm.rake$variables["educaFr_w03"])))
  colnames(df)[2] <- "Unweighted Mean"
  df <- merge(df,as.data.frame(prop.table(svytable(~educaFr_w03,design=svy.mm.rake))),by="educaFr_w03")
  colnames(df)[3] <- "Pre-Trim Mean"
  df <- cbind(df,Population = c(.61,.11,.25))
  df$mediana <- median(weights(svy.mm.rake))
  df$iqr <- IQR(weights(svy.mm.rake))
  max_pre_trim <- max(weights(svy.mm.rake))
  
  #The weight is trimmed
  
  svy.mm.rake <- trimWeights(svy.mm.rake, lower=1/(median(weights(svy.mm.rake))+k*IQR(weights(svy.mm.rake))), 
                                 upper=median(weights(svy.mm.rake))+k*IQR(weights(svy.mm.rake)))
  df <- cbind(df,as.data.frame(prop.table(svytable(~educaFr_w03,design=svy.mm.rake)))[2])
  colnames(df)[which(colnames(df)=="Freq")] <- "Weighted Mean" # This is the trimmed weight
  
  df$Variance <- 0
  df$Variance[1] <- svyvar(~sec,design=svy.mm.rake,na.rm=T)
  df$Variance[2] <- svyvar(~ter_tec,design=svy.mm.rake,na.rm=T)
  df$Variance[3] <- svyvar(~ter_univ,design=svy.mm.rake,na.rm=T)
  
  df$mse <- (df$`Weighted Mean`- df$Population)^2+df$Variance
  df$max_pre_trim <- max_pre_trim
  df$max_post_trim <- max(weights(svy.mm.rake))
  
  # A variable that informs the relative bias and the relative variance is created (Potter & Zheng, 2015, pg.2713)
  df$rel_bias <- 100*(df$Population-df$`Weighted Mean`)/df$Population
  df$rel_var <- 100*(df$`Weighted Mean`-df$`Pre-Trim Mean`)/df$`Pre-Trim Mean`

  df$k <- k
  
  # df is stored in a list  
  l[[k]] <- df
}

library(data.table)

#df2 <- data.table(m=names(l),dplyr::bind_rows(l))
df2 <- do.call("rbind", l)
#df2$delta_mse <- ave(df2$mse,df2$educaFr_w03, FUN = function(x) c(0, diff(x)))

df2 <- df2 %>% arrange(educaFr_w03) %>% group_by(educaFr_w03) %>% mutate(delta_mse = mse-dplyr::lag(mse,n=1))

##Grafico Online Supplment - section weights
mse_plot21 <- ggplot(data=filter(df2,educaFr_w03!="Tertiary Tech."),aes(k, mse, color=educaFr_w03)) + 
  geom_line() + geom_point() + labs(x="K",y="MSE",color="Educational Level",
                                    title="A) MSE Reduction by K") + theme_bw()

delta_plot21 <- ggplot(data=filter(df2,educaFr_w03!="Tertiary Tech."), aes(k, delta_mse, color=educaFr_w03)) + 
  geom_line() + geom_point() + labs(x="K",y="Delta MSE",color="Educational Level",
                                    title="B) MSE Change by each additional K") + theme_bw()


ggsave(file=paste0(ruta,"/Articulos/Electoral Winners y Plebiscito/Resultados/MSE Pres.png"),
       gridExtra::grid.arrange(mse_plot21,delta_plot21),dpi=1000)


#Weight trimming
k=9

svy.mm <- svydesign(ids = ~1, data = mm)
svy.mm.rake <- rake(design = svy.mm, sample.margins = list(~educ_r,~edad_r),
                    population.margins = list(educ.dist,edad.dist))
summary(weights(svy.mm.rake))

svy.mm.rake <- trimWeights(svy.mm.rake, lower=1/(median(weights(svy.mm.rake)+k*IQR(weights(svy.mm.rake)))), 
                           upper=median(weights(svy.mm.rake)+k*IQR(weights(svy.mm.rake))))
summary(weights(svy.mm.rake))

mm <- as_data_frame_with_weights(svy.mm.rake,full_wgt_name = "weight.educ.edad.pre_match",
                                     rep_wgt_prefix = "REP_WGT_")

mm <- mm %>% dplyr::select(-educ_r,-edad_r)

summary(mm$weight.educ.edad.pre_match)
